/*******************************************************************************
																				
	DESCRIPTION: 	This do file investigate the predictive power of SILC variables.
					
*******************************************************************************/

clear all
global id_code 120
pause on

local vars _
local model Full

*******************************************************************************
 * Combine the predictions made for each year
*******************************************************************************

* The time-span is different for each model
if "`model'"=="Full" {
local yearStart 1992
local yearEnd 2016
local yearSpan 1993/2016
}

* Now we combine the predictions for each model using the time-span that we set above
use "${data}/003_MainWithEnsemblePred_`model'`vars'`yearStart'.dta", clear

gen DataYear = `yearStart' // DataYear indicates which year individuals comes from

forval year = `yearSpan' {
append using "${data}/003_MainWithEnsemblePred_`model'`vars'`year'.dta"
replace DataYear = `year' if DataYear==.
}

*******************************************************************************
 * Clean and prepare the data
*******************************************************************************

/* Get rid of those predictions for which we don't have a corresponding Y value. 
We do this because we might have individuals for whom predictions were made in two years (for both years we have Xs),
but whose outcome was non-missing in one year only. 
In the next step, when we combine the observations to ensure that there is only one observation per spell, 
we do not want to have two different predictions for the same person */	

foreach months in 6 {
		  
	foreach unempl in 0 6 {
			
	replace p_emplAft`months'M_`unempl'M_In = . if emplAft`months'M_`unempl'M_In == . 

	}
}


/* For spells that take place in at least two calendar years, there are often two
observations, as predictions are created separately for each year. Importantly,
each dependent variable is still only predicted once (in the year in which
the relevent month of unemployment is reached). Now we combine observations
so that there is only one observation per spell.*/

sort LopNr_PersonNr InLnr DataYear

foreach months in 6 {
	  
	foreach unempl in 0 6 {
		
	by LopNr_PersonNr InLnr: egen pe_emplAft`months'M_`unempl'M_In = mean(p_emplAft`months'M_`unempl'M_In)
	by LopNr_PersonNr InLnr: egen e_emplAft`months'M_`unempl'M_In = mean(emplAft`months'M_`unempl'M_In)
	
	drop p_emplAft`months'M_`unempl'M_In emplAft`months'M_`unempl'M_In
	rename (pe_emplAft`months'M_`unempl'M_In e_emplAft`months'M_`unempl'M_In) (p_emplAft`months'M_`unempl'M_In emplAft`months'M_`unempl'M_In)

	}
}

duplicates drop LopNr_PersonNr InLnr, force

*******************************************************************************
 * Merge with survey data
*******************************************************************************
compress
keep Lop* In* year n inSample emplAft6M_0M_In p_emplAft6M_0M_In startU trueEnd duration DataYear
merge m:1 LopNr_PersonNr year using "${data}/001_11_SILCData.dta", nogen keep(3)

*******************************************************************************
 * Run a regression of empirical JFR on predicted JFR and survey variables
*******************************************************************************	
local months 6
local unempl 0
		
* Define sets of variables
local indivVars lifeSatisf meaningLife beingNervous feelingDown feelingDepre
local combinedVars pc1
local largerSample genHealth LTIllness limitActiv
local HealthPC HealthPC1

* All regressions:
foreach set in indivVars combinedVars largerSample HealthPC {
	
	preserve
	
	foreach var in ``set'' {
		drop if `var' ==.
	}			
	
	drop if p_emplAft`months'M_`unempl'M_In == .
	
	reg emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In i.year
	eststo `set'_pred
	
	reg emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In i.year ``set''
	eststo `set'_pred_vars

	
	reg emplAft`months'M_`unempl'M_In ``set''
	eststo `set'_vars
	
	restore
	
	if "`set'" == "largerSample" | "`set'" == "HealthPC" {
		preserve
		
			foreach var in `indivVars' ``set'' {
				drop if `var' ==.
			}			
			
			drop if p_emplAft`months'M_`unempl'M_In == .
			
			reg emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In i.year
			eststo `set'_mhs_pred
			
			reg emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In i.year ``set''
			eststo `set'_mhs_pred_vars

			
			reg emplAft`months'M_`unempl'M_In ``set''
			eststo `set'_mhs_vars
			
		
		restore
	}
}
	
	
* Final table:
estimates restore combinedVars_pred
	
local mh1_b_pred: di %5.3f `= _b[p_emplAft`months'M_`unempl'M_In]'	
local mh1_se_pred: di %5.3f `= _se[p_emplAft`months'M_`unempl'M_In]' 	
local mh1_r2: di %5.3f `= e(r2)'
local mh1_r2a: di %5.3f `= e(r2_a)'
local mh1_n: di %5.0f `= e(N)'


estimates restore combinedVars_pred_vars

local mh2_b_pred: di %5.3f `= _b[p_emplAft`months'M_`unempl'M_In]' 	
local mh2_se_pred: di %5.3f `= _se[p_emplAft`months'M_`unempl'M_In]' 	
local mh2_b_pc: di %5.3f `= _b[pc1]' 	
local mh2_se_pc: di %5.3f `= _se[pc1]' 	
local mh2_r2: di %5.3f `= e(r2)'
local mh2_r2a: di %5.3f `= e(r2_a)'
local mh2_n: di %5.0f `= e(N)'

estimates restore combinedVars_vars
		
local mh3_b_pc: di %5.3f `= _b[pc1]' 	
local mh3_se_pc: di %5.3f `= _se[pc1]' 	
local mh3_r2: di %5.3f `= e(r2)'
local mh3_r2a: di %5.3f `= e(r2_a)'
local mh3_n: di %5.0f `= e(N)'
	
estimates restore HealthPC_pred

local gh1_b_pred: di %5.3f `= _b[p_emplAft`months'M_`unempl'M_In]' 	
local gh1_se_pred: di %5.3f `= _se[p_emplAft`months'M_`unempl'M_In]' 	
local gh1_r2: di %5.3f `= e(r2)'
local gh1_r2a: di %5.3f `= e(r2_a)'
local gh1_n: di %5.0f `= e(N)'	
	
estimates restore HealthPC_pred_vars
	
local gh2_b_pred: di %5.3f `= _b[p_emplAft`months'M_`unempl'M_In]' 	
local gh2_se_pred: di %5.3f `= _se[p_emplAft`months'M_`unempl'M_In]' 	
local gh2_b_pc: di %5.3f `= _b[HealthPC1]' 	
local gh2_se_pc: di %5.3f `= _se[HealthPC1]' 	
local gh2_r2: di %5.3f `= e(r2)'
local gh2_r2a: di %5.3f `= e(r2_a)'
local gh2_n: di %5.0f `= e(N)'

estimates restore HealthPC_vars
	
local gh3_b_pc: di %5.3f `= _b[HealthPC1]'	
local gh3_se_pc: di %5.3f `= _se[HealthPC1]' 	
local gh3_r2: di %5.3f `= e(r2)'
local gh3_r2a: di %5.3f `= e(r2_a)'
local gh3_n: di %5.0f `= e(N)'

estimates restore HealthPC_mhs_pred

local ghxmh1_b_pred: di %5.3f `= _b[p_emplAft`months'M_`unempl'M_In]' 	
local ghxmh1_se_pred: di %5.3f `= _se[p_emplAft`months'M_`unempl'M_In]' 	
local ghxmh1_r2: di %5.3f `= e(r2)'
local ghxmh1_r2a: di %5.3f `= e(r2_a)'
local ghxmh1_n: di %5.0f `= e(N)'	
	
estimates restore HealthPC_mhs_pred_vars

local ghxmh2_b_pred: di %5.3f `= _b[p_emplAft`months'M_`unempl'M_In]'	
local ghxmh2_se_pred: di %5.3f `= _se[p_emplAft`months'M_`unempl'M_In]' 	
local ghxmh2_b_pc: di %5.3f `= _b[HealthPC1]' 	
local ghxmh2_se_pc: di %5.3f `= _se[HealthPC1]' 	
local ghxmh2_r2: di %5.3f `= e(r2)'
local ghxmh2_r2a: di %5.3f `= e(r2_a)'
local ghxmh2_n: di %5.0f `= e(N)'

estimates restore HealthPC_mhs_vars
	
local ghxmh3_b_pc: di %5.3f `= _b[HealthPC1]' 	
local ghxmh3_se_pc: di %5.3f `= _se[HealthPC1]' 	
local ghxmh3_r2: di %5.3f `= e(r2)'
local ghxmh3_r2a: di %5.3f `= e(r2_a)'
local ghxmh3_n: di %5.0f `= e(N)'

		
* New format:
file open myfile using "${output}/${id_code}_Regression_JFR_SILC_Appendix_table.tex", write replace

file write myfile "%\documentclass{article}" _newline ///
	"%\usepackage{booktabs}" _newline ///
	"%\usepackage[margin=1in]{geometry}" _newline ///
	"%\begin{document}" _newline ///
	"%\begin{table}[h] \centering" _newline ///
	"\footnotesize \begin{tabular}{@{\extracolsep{8pt}} l  c c c  c c c  c c c}" _newline ///
	"\hline \hline \addlinespace[3ex]" _newline ///
	///
	"& \multicolumn{3}{c}{\textbf{General Health (GH)}} & \multicolumn{3}{c}{\textbf{Mental Health (MH)}} & \multicolumn{3}{c}{\textbf{GH for MH sample}} \\" _newline ///
	"\cline{2-4} \cline{5-7} \cline{8-10} \addlinespace[1.5ex]" _newline ///
	"& (1) & (2) & (3) & (4) & (5) & (6) & (7) & (8) & (9) \\ \addlinespace[1.5ex]" _newline ///
	"hline \addlinespace[1.5ex]" _newline ///
	"Pred. JFR & `gh1_b_pred' & `gh2_b_pred' &  & `mh1_b_pred' & `mh2_b_pred' &  & `ghxmh1_b_pred' & `ghxmh2_b_pred' & \\" _newline ///
	 "& (`gh1_se_pred') & (`gh2_se_pred') &  & (`mh1_se_pred') & (`mh2_se_pred') &  & (`ghxmh1_se_pred') & (`ghxmh2_se_pred') & \\" _newline ///
	"Health PC1 &  & `gh2_b_pc' & `gh3_b_pc'  &  & & &  & `ghxmh2_b_pc' & `ghxmh3_b_pc' \\" _newline ///
	 "&  & (`gh2_se_pc') & (`gh3_se_pc') & & & &  & (`ghxmh2_se_pc') & (`ghxmh3_se_pc') \\ \addlinespace[1.5ex]" _newline ///
	"Mental Health PC1 & & & &  & `mh2_b_pc' & `mh3_b_pc' & & & \\" _newline ///
	 "& & & &  & (`mh2_se_pc') & (`mh3_se_pc') &  &  & \\" _newline ///
	 "\hline \addlinespace[1.5ex]" _newline ///
	"\( R^2 \) & `gh1_r2' & `gh2_r2' & `gh3_r2' & `mh1_r2' & `mh2_r2' & `mh3_r2' & `ghxmh1_r2' & `ghxmh2_r2' & `ghxmh3_r2' \\" _newline ///
	"Adj. \( R^2 \) & `gh1_r2a' & `gh2_r2a' & `gh3_r2a' & `mh1_r2a' & `mh2_r2a' & `mh3_r2a' & `ghxmh1_r2a' & `ghxmh2_r2a' & `ghxmh3_r2a' \\" _newline ///
	"\( N \) & `gh1_n' & `gh2_n' & `gh3_n' & `mh1_n' & `mh2_n' & `mh3_n' & `ghxmh1_n' & `ghxmh2_n' & `ghxmh3_n' \\" _newline ///
	///
	"\addlinespace[3ex]" _newline ///
	"\hline \hline \addlinespace[1.5ex]" _newline ///
	"\end{tabular}" _newline ///
	"%\end{table}" _newline ///
	"%\end{document}"
file close myfile	

